#include "Genome_Index_Table.h"
#ifdef _OPENMP
#include <omp.h>
#endif
// For test
#define PRINT_GENOME_SUBSTRING(genomeIndex, slideWindowLength)\
    do {\
       char ref[FILENAME_MAX];\
       this->pgenomeNTInBits->getSubstringInBits(genomeIndex, slideWindowLength).decode(ref);\
       cout << genomeIndex << "\t" << ref << endl;\
    } while(0)

#define GENOME_SUBSTRING_CMP(genomeIndex1, genomeIndex2, slideWindowLength)\
    do {\
       char ref1[FILENAME_MAX];\
       char ref2[FILENAME_MAX];\
       this->pgenomeNTInBits->getSubstringInBits(genomeIndex1, slideWindowLength).decode(ref1);\
       this->pgenomeNTInBits->getSubstringInBits(genomeIndex2, slideWindowLength).decode(ref2);\
       if(strcmp(ref1, ref2) != 0) {\
            cout << genomeIndex1 << ',' << ref1 << "\t" << endl;\
            cout << genomeIndex2 << ',' << ref2 << "\t" << endl;\
       }\
    } while(0)


CGenome_Index_Table::CGenome_Index_Table(void)
{
    this->initialization();
}

CGenome_Index_Table::~CGenome_Index_Table(void)
{

}

int CGenome_Index_Table::initialization(void)
{
    this->num_of_repeat_patterns = 0;
    this->pbaRepeatRepresentativeFlag = NULL;
    this->pbaRepeatMaskedFlag = NULL;
    return(0);
}

int CGenome_Index_Table::getGenomeNTdata(const char*  genomeListfileName, string refFormat = "")
{
    if (hasTheExtName(genomeListfileName, ".fasta") ||
            hasTheExtName(genomeListfileName, ".fna") ||
            hasTheExtName(genomeListfileName, ".mfa") ||
            hasTheExtName(genomeListfileName, ".dat") ||
            hasTheExtName(genomeListfileName, ".fa") ||
            refFormat == "fasta") {
        const bool bFastaFormat = true;
        this->pgenomeNT = new CGenomeNTdata();
        this->pgenomeNT->addChromosome(genomeListfileName, bFastaFormat);
    } else {
        this->pgenomeNT = new CGenomeNTdata(genomeListfileName);
    }
    return(0);
}

bool CGenome_Index_Table::read_index_table(const char* indexFilePath, bool bPrintErrMsg)
{
    bool sucessfuallyReadTable = true;
    if (!fileExist(indexFilePath)) {
        if (hasTheExtName(indexFilePath, ".index")) {
            if (bPrintErrMsg) {
                LOG_INFO("Info %d: Index file %s haven't been built in %s.\n",\
                         INFO_LOG, indexFilePath, get_working_directory().c_str());
            }
        }
        return(false);
    }
    if (this->pgenomeNT == NULL) {
        this->pgenomeNT = new CGenomeNTdata();
    }
    if (this->pgenomeNTInBits == NULL) {
        this->pgenomeNTInBits = new CGenomeInBits();
    }
    FILE* fp = fopen(indexFilePath, "rb");
    if (fscanf(fp, "%s\n", this->caRefName) == 0) {
        ERR;
    }
    if (readRefInBinFile(fp, this->pgenomeNTInBits, this->pgenomeNT)) {
        LOG_INFO("Info %d: Fail to readin ref from index file\n", INFO_LOG);
        return(false);
    }
    TIME_INFO(sucessfuallyReadTable = this->read_Hash_Table(fp), "Read in index ");
    TIME_INFO(check_masked_flags(), "Masked region built");
    fclose(fp);
    if (this->chooseHashFunction(this->uiRead_Length, \
                                 this->chosenSeedId, bMapReadInColors) < 0) {
        sucessfuallyReadTable = false;
    }
    // When table is built, this is set after bucket the index
    if (bEXTEND_SEED) {
        this->uiSeedLength = this->uiRead_Length;
    }
    if (sucessfuallyReadTable) {
        LOG_INFO("Info %d: Successfully read in the index\n", INFO_LOG);
    } else {
        LOG_INFO("Info %d: No saved index is found. Building a new index\n", INFO_LOG);
    }
    return(sucessfuallyReadTable);
}

bool CGenome_Index_Table::make_index_table(unsigned int uiReadLength, unsigned int uiSeedId,
        bool bMapReadInColors, bool makedMathRepeats)
{
    if ( uiReadLength > MAX_READ_LENGTH) {
        uiReadLength /= 2; // Make sure the read length is in the range
    }
    delete this->pbaRepeatMaskedFlag;
    this->pbaRepeatMaskedFlag = new CboolFlagArray(this->pgenomeNT->iGenomeSize);
    delete this->pbaRepeatRepresentativeFlag;
    this->pbaRepeatRepresentativeFlag = new CboolFlagArray(this->pgenomeNT->iGenomeSize);
    if (makedMathRepeats) {
        LOG_INFO("Info %d: Identify Maskable mathmatical repeats\n", INFO_LOG);
        if (find_maskable_mathmatical_repeats(uiReadLength, uiSeedId) < 0) {
            LOG_INFO("Info %d: Fail to find mathmatical repeats\n", ERROR_LOG);
            return(false);
        }
    }

    const int USE_HASH_KEY_TO_SORT = 0; // Sort by the default hash key, by setting it to 0.
    if (this->chooseHashFunction(uiReadLength, uiSeedId, bMapReadInColors) < 0) {
        return(false); // fail to choose the hash function.
    }
    if (this->pHashIndexTable != NULL) {
        delete this->pHashIndexTable;
        this->pHashIndexTable = NULL;
    }
    this->pHashIndexTable = new CHashIndexT(NO_OF_BUCKET);
    // this->add_repeat_masked_flags(); // Read in the masked region from fixed file and set the flag
    TIME_INFO(this->countBucketSize(), "Count bucket size "); // counting each bucket size
    TIME_INFO(this->hashKmer2Bucket(), "Hash record "); // put each element into a bucket.
    TIME_INFO(this->sortTable(USE_HASH_KEY_TO_SORT), "Sort table "); // sort in each bucket, using
    TIME_INFO(this->check_masked_flags(), "Check masked loci "); // put each element into a bucket.
    LOG_INFO("Info %d: Successfully made the index\n", INFO_LOG);
    return(true);
}

bool CGenome_Index_Table::save_index_table(const char* indexFilePath, bool bPrintErrMsg)
{
    this->indexFileName = get_index_path(string(indexFilePath));
    remove( this->indexFileName.c_str() ); // delete the previous indexa
    string tmpIndexFileN = chExtName(this->indexFileName, ".tmp"); // save the incomplete index in *.tmp
    LOG_INFO("Info %d: Save index to %s as tmp\n", CONFIG_LOG, tmpIndexFileN.c_str());
    FILE *fp = fopen(tmpIndexFileN.c_str(), "wb+");
    fprintf(fp, "%s\n", this->caRefName);
    if (saveRefInBinFile(fp, this->pgenomeNTInBits, this->pgenomeNT)) {
        if (bPrintErrMsg) {
            LOG_INFO("Info %d: Fail to save ref in binary to %s\n", ERROR_LOG, tmpIndexFileN.c_str());
        }
        return(false);
    }
    TIME_INFO(save_Hash_Table(fp), "Index table saved to disk");
    fclose(fp);
    if (rename(tmpIndexFileN.c_str(), indexFileName.c_str())) {
        if (bPrintErrMsg) {
            LOG_INFO("Info %d: ERR to rename %s to index file\n"\
                     , ERROR_LOG, tmpIndexFileN.c_str());
        }
        return(false);
    }
    // DEBUG this->read_index_table(this->indexFileName.c_str());
    return(true);
}

string CGenome_Index_Table::get_index_path(string indexFilePath)
{
    string defaultFileName = default_index_path(this->caRefName, this->bMapReadInColors,\
                             this->chosenSeedId, this->uiRead_Length);
    if (is_accessible_directory(indexFilePath.c_str())) {
        indexFilePath = getFullPath(indexFilePath, defaultFileName);
    }

    if (isPathWritable(indexFilePath.c_str())) {
        this->indexFileName = string(indexFilePath);
    } else {
        this->indexFileName = defaultFileName;
        // const char* pindexFileName = indexFilePath.c_str();
        if (indexFilePath != "") {
            LOG_INFO("Info %d: Path %s is not writable.\nUse default Path %s instead\n"\
                     , WARNING_LOG, indexFilePath.c_str(), this->indexFileName.c_str());
        }
    }
    return(this->indexFileName);
}

// this function will first build "suffix-like array" and compare the neighbor substring to find all mathamatical repeat in uiReadLength + uiNoOfShift
int CGenome_Index_Table::find_maskable_mathmatical_repeats(unsigned int uiReadLength, unsigned int uiSubThreshold)
{
    bool originalbMapReadInColors = this->bMapReadInColors; // Save the original setting
    this->bMapReadInColors = false; // Find the uiReadLength + this->uiNoOfShift repeat without consider color space or not

    if (this->chooseHashFunction(uiReadLength, uiSubThreshold, false) < 0) {
        return(-1);
    }

    if (this->pHashIndexTable != NULL) {
        delete this->pHashIndexTable;
        this->pHashIndexTable = NULL;
    }
    this->pHashIndexTable = new CHashIndexT(NO_OF_BUCKET);

    unsigned int originalReadLengthInBits = CReadInBits::iReadLength;
    CReadInBits::iReadLength = uiReadLength + this->uiNoOfShift;
    // (uiReadLength + this->uiNoOfShift) bp substring with N won't be bucket to sort.
    this->uiSeedLength = uiReadLength + this->uiNoOfShift; // (this->uiSeedLength was set in chooseHashFunction)

    TIME_INFO(this->countBucketSize(), "Count bucket size "); // counting each bucket size
    TIME_INFO(this->hashKmer2Bucket(), "Hash record created "); // put each element into a bucket.
    TIME_INFO(this->sortTable(this->uiNoOfShift + this->uiRead_Length), "Sort table "); // sort in each bucket
    TIME_INFO(this->find_mathmatical_repeats(), "Find math repeats"); // check the repeat and put into a flag array this->pbaRepeatMaskedFlag
    // TIME_INFO(this->check_masked_flags(), "Check masked loci "); // set the flag for each loci its following windows contain 'N'
    // The this->pHashIndexTable and this->pIndexTable will be deleted and new when make_index_table();

    CReadInBits::iReadLength = originalReadLengthInBits; // Push back the original setting
    this->bMapReadInColors = originalbMapReadInColors;
    return(0);
}

int CGenome_Index_Table::chooseHashFunction(unsigned int uiReadLength, \
        int chosenSeedId, \
        bool bMapReadInColors)
{
    CReadInBits::iReadLength = (int) uiReadLength;
    this->uiRead_Length = uiReadLength;
    this->chosenSeedId = chosenSeedId ;
    this->bMapReadInColors = bMapReadInColors;
    if (bMapReadInColors) {
        return(CGenome_Index::chooseHashFunction(uiReadLength - 1, chosenSeedId ));
    } else {
        return(CGenome_Index::chooseHashFunction(uiReadLength, chosenSeedId ));
    }
}

// Private function called by Construct_IndexTable, which do the counting for each bucket - Step 1
unsigned int CGenome_Index_Table::countBucketSize(void)
{
    unsigned int kmer_length; // Length of the sliding windows on the reference.
    if (this->bMapReadInColors) {// Need one more base to get colors for index.
        kmer_length = this->uiSeedLength + 1;
    } else {
        kmer_length = this->uiSeedLength;
    }

    unsigned int uiNonMaskedLoci = 0;
    for (unsigned int chrId = 0; chrId < this->pgenomeNT->iNo_of_chromosome; chrId++) {
        uiNonMaskedLoci += countBucketSize4Chr(chrId, kmer_length);
    }
    unsigned int returnV = this->bucketCount2Index(uiNonMaskedLoci);
    return(returnV);
}

unsigned int CGenome_Index_Table::countBucketSize4Chr(int chrId, unsigned int kmer_length)
{
    unsigned int uiNonMaskedLoci = 0;
    CchromosomeNTdata* pChr = this->pgenomeNT->paChromosomes[chrId];
    unsigned int* counter = this->pHashIndexTable->aiIndexTable;
    unsigned int chrIndexStart = this->pgenomeNT->chrIndex2genomelocusID(chrId, 0);

    /*  int th_id = 0, no_CPU = 1;
        unsigned int countersNoPerCpu = this->pHashIndexTable->uiSize;

    #ifdef _OPENMP // Parallelization
        no_CPU = omp_get_num_procs();
        countersNoPerCpu /= no_CPU;
        cout << "Divide counters into " << countersNoPerCpu << "counters per CPU" << endl; // DEBUG
    #pragma omp parallel private(th_id)
        {
        th_id = omp_get_thread_num();
    #endif
    */
    for (unsigned int chrIndex = 0; chrIndex < pChr->iChromosome_size - this->uiSeedLength; chrIndex ++) {
        unsigned int genomeIndex = chrIndexStart + chrIndex;
        unsigned int uiHashValue = 0;
        CReadInBits kmerInBits;
        bool goodKmer = this->pgenomeNTInBits->fragACGTKmerInBits(kmerInBits, genomeIndex, kmer_length);
        if (goodKmer && !this->pbaRepeatMaskedFlag->b(genomeIndex) /* if not masked */) {
            /*char caRead[wordSize];
            kmerInBits.decode(caRead); // check debug*/
            if (this->bMapReadInColors) { // put in index for colors instead of bases.
                kmerInBits = bases2PureColors(kmerInBits);
            }
            uiHashValue = this->getHashValue(kmerInBits);
            // if (th_id == 0)
            uiNonMaskedLoci++;
            // if (uiHashValue / countersNoPerCpu == (unsigned int)th_id)
            counter[uiHashValue]++; // In crease the count to the bucket
        }
    }
    /*
    #ifdef _OPENMP // Parallelization
        }
    #endif
    */
    return(uiNonMaskedLoci);
}

// private function only called by countBucketSize
unsigned int CGenome_Index_Table::bucketCount2Index(unsigned int uiNonMaskedLoci)
{
    bool error = false;
    // Let the bucket size counter become hash index to each bucket
    if (this->pHashIndexTable->aiIndexTable[this->pHashIndexTable->uiSize] != 0) {
        ERR;
        error = true;
    }
    this->pHashIndexTable->Counter2Index();
    if (this->pHashIndexTable->aiIndexTable[this->pHashIndexTable->uiSize] !=
            this->pHashIndexTable->aiIndexTable[this->pHashIndexTable->uiSize -1]) {
        ERR;
        error = true;
    }

    // The last record shows the total nubmer of Kmers that have been hashed to the buckets
    this->size = this->pHashIndexTable->aiIndexTable[this->pHashIndexTable->uiSize];
    if (this->size != uiNonMaskedLoci) {
        ERR;
        cout << "The real bucket size is " << this->size << endl;
        cout << "The exp bucket size is " << uiNonMaskedLoci << endl;;
        error = true;
    }
    if(error) {
        return(0);
    } else {
        return(this->size);
    }
}

// Hash the Kmer to the HashIndexTable bucket, without sorting the dHashkey
int CGenome_Index_Table::hashKmer2Bucket(void)
{
    delete [] this->pIndexTable;
    this->pIndexTable = new CIndex_Type[this->size];
    // Note there are this->pHashIndexTable->uiSize buckets but this->pHashIndexTable->uiSize + 1 bucket pointers

    unsigned int uiNonMaskedLoci = 0;
    for (unsigned int chrId = 0; chrId < this->pgenomeNT->iNo_of_chromosome; chrId++) {
        unsigned int kmer_length; // Length of the sliding windows on the reference.
        if (this->bMapReadInColors) { // Need one more base to get colors for index.
            kmer_length = this->uiSeedLength + 1;
        } else {
            kmer_length = this->uiSeedLength;
        }
        uiNonMaskedLoci += this->hashKmer2Bucket4Chr(chrId, kmer_length);
    }
    if (this->size != uiNonMaskedLoci) {
        ERR;
        return(1);
    }
    return(0);
}

int CGenome_Index_Table::hashKmer2Bucket4Chr(int chrId, unsigned int kmer_length)
{
    unsigned int uiNonMaskedLoci = 0;
    CchromosomeNTdata* pChr = this->pgenomeNT->paChromosomes[chrId];
    unsigned int chrIndexStart = this->pgenomeNT->chrIndex2genomelocusID(chrId, 0);
    /*
        int th_id = 0, no_CPU = 1;
        unsigned int countersNoPerCpu = this->pHashIndexTable->uiSize;
    #ifdef _OPENMP // Parallelization
        no_CPU = omp_get_num_procs();
        countersNoPerCpu /= no_CPU;
        cout << "Divide counters into " << countersNoPerCpu << " counters per CPU" << endl; // DEBUG
    #pragma omp parallel private(th_id)
        {
            th_id = omp_get_thread_num();
    	}
    #endif
    */
    for (unsigned int chrIndex = 0; chrIndex < pChr->iChromosome_size - this->uiSeedLength; chrIndex++) {
        unsigned int genomeIndex = chrIndexStart + chrIndex;
        unsigned int uiHashValue = 0;
        unsigned int uiTableIndex = 0;
        CReadInBits kmerInBits;
        bool goodKmer = this->pgenomeNTInBits->fragACGTKmerInBits(kmerInBits, genomeIndex, kmer_length);
        if (goodKmer && !this->pbaRepeatMaskedFlag->b(genomeIndex) /* if not masked */) {
            if (this->bMapReadInColors) { // put in index for colors instead of bases.(
                kmerInBits = bases2PureColors(kmerInBits);
            }
            uiHashValue = this->getHashValue(kmerInBits);

            // Check, if the Bucket counting and the Hashing assign is consistent, it should be the same.
            if (this->pHashIndexTable->aiIndexTable[uiHashValue] < 0) {
                LOG_INFO("\nInfo %d: Inconsistent in bucket size. \n", WARNING_LOG);
            } else {
                // if (th_id == 0)
                uiNonMaskedLoci++;
                //if (uiHashValue / countersNoPerCpu == (unsigned int)th_id)
                {
                    // Put the record at -1 position of the stack
                    this->pHashIndexTable->aiIndexTable[uiHashValue]--;
                    uiTableIndex = this->pHashIndexTable->aiIndexTable[uiHashValue];
                    this->pIndexTable[uiTableIndex] = genomeIndex;
                }
            }
        }
    }
    /*
    #ifdef _OPENMP // Parallelization
        }
    #endif
    */
    return(uiNonMaskedLoci);
}

bool CGenome_Index_Table::compareKey(CIndex_Type I1, CIndex_Type I2)
{
    CReadInBits ref1, ref2;
    if (this->bMapReadInColors) {
        ref1 = this->pgenomeNTInBits->getSubstringInBits(I1, this->uiSeedLength + 1);
        ref2 = this->pgenomeNTInBits->getSubstringInBits(I2, this->uiSeedLength + 1);
        ref1 = bases2PureColors(ref1); // ref1 is now in pure colors
        ref2 = bases2PureColors(ref2); // ref2 is now in pure colors
    } else {
        ref1 = this->pgenomeNTInBits->getSubstringInBits(I1, this->uiSeedLength);
        ref2 = this->pgenomeNTInBits->getSubstringInBits(I2, this->uiSeedLength);
    }
    unsigned int key1 = this->getSeedKey(ref1);
    unsigned int key2 = this->getSeedKey(ref2);
    return(key1 < key2);
}

// return true if the corresponding substring I1 < I2
bool CGenome_Index_Table::compareSubstring(CIndex_Type I1, CIndex_Type I2, unsigned int slidingWindows)
{
    CReadInBits ref1, ref2;
    ref1 = this->pgenomeNTInBits->getSubstringInBits(I1, slidingWindows);
    ref2 = this->pgenomeNTInBits->getSubstringInBits(I2, slidingWindows);
    return(ref1 < ref2);
}

// return true if the corresponding substring is the same
bool CGenome_Index_Table::sameSubstring(CIndex_Type I1, CIndex_Type I2, unsigned int slidingWindows)
{
    CReadInBits ref1, ref2;
    ref1 = this->pgenomeNTInBits->getSubstringInBits(I1, slidingWindows);
    ref2 = this->pgenomeNTInBits->getSubstringInBits(I2, slidingWindows);
    return(ref1 == ref2);
}

// A function for debug
unsigned int CGenome_Index_Table::showBucketPtr(const char* filename)
{
    ofstream bucketCount;
    bucketCount.open(filename, std::iostream::out);
    ofstream & pOut = bucketCount;
    //ostream & pOut = cout;
    for (int i = 0; i < (int)this->pHashIndexTable->uiSize; i++) {
        unsigned int BucketStart = this->pHashIndexTable->aiIndexTable[i];
        unsigned int NextBucketStart = this->pHashIndexTable->aiIndexTable[i + 1];
        if(BucketStart != NextBucketStart) {
            pOut << "Bucket " << i << "BucketStart " << BucketStart << endl;
        }
    }
    pOut << "Bucket No " << this->pHashIndexTable->uiSize << endl; // DEBUG
    pOut.close();
    return(this->pHashIndexTable->aiIndexTable[this->pHashIndexTable->uiSize - 1]);
}

int CGenome_Index_Table::sortTable(unsigned int substringLength)
{
    if (bEXTEND_SEED) {
        // The previous seed Length is the "shortest seed length" to exclude 'N' when hash genome Index into bins.
        // The longest  seed length in the exteneded seed mode is the read length.
        this->uiSeedLength = this->uiRead_Length;
    }
    CcompareFunctor4Sort sortFunctor(this, substringLength);

#ifdef _OPENMP // Parallelization 
    int numberOfCPUs = omp_get_num_procs();
    LOG_INFO("\nInfo %d: Sort buckets using %d CPUs %s.\n",\
             INFO_LOG, numberOfCPUs, BLANK_LINE);
    #pragma omp parallel for
#endif // Because OpenMP 2.5 only support signed integer loop. Case the uiSize. The number of bucket is limited to signed int  
    for (int i = 0; i < (int)this->pHashIndexTable->uiSize; i++) {
        unsigned int BucketStart = this->pHashIndexTable->aiIndexTable[i];
        unsigned int NextBucketStart = this->pHashIndexTable->aiIndexTable[i + 1];
        // No need to sort if bucket is empty or has only one element
        if (NextBucketStart > BucketStart + 1) {
            // Put the functor for sorting (sorting use the hashkey generated by the corresponding genome sequence)
            std::sort(&this->pIndexTable[BucketStart], &this->pIndexTable[NextBucketStart], sortFunctor);
        } else if (NextBucketStart < BucketStart) {
            cout << "NextBucketStart:" << NextBucketStart << "BucketStart:" << BucketStart << endl;
            ERR;
        } // else empty bucket or single element, no need to sort
    }
    return(0);
}


// Find math repeat by checking the neighbor of index array in a bucket.
// Build an flags arrays for masked mathematical repeats. This is imcomplete.
int CGenome_Index_Table::find_mathmatical_repeats(void)
{
    int num_of_repeats = 0;
    unsigned int slideWindowLength = this->uiRead_Length + this->uiNoOfShift;

    // char outputFileName[FILENAME_MAX];
    // sprintf(outputFileName, "maskable_repeat_%s_%d_%d.txt", this->caRefName, this->uiRead_Length, this->uiNoOfShift);
    ofstream ofile; // (outputFileName); Don't open the file for

    for (unsigned int i = 0; i < this->pHashIndexTable->uiSize; i++) {
        unsigned int BucketStart = this->pHashIndexTable->aiIndexTable[i];
        unsigned int NextBucketStart = this->pHashIndexTable->aiIndexTable[i + 1];
        num_of_repeats += find_mathmatical_repeats_in_a_bucket(ofile, BucketStart, NextBucketStart);
    }

    cout << num_of_repeats << " repeats with " << num_of_repeat_patterns  << " patterns in " << slideWindowLength << " bp are found!" << endl;

    ofile.close();
    return(num_of_repeats);
}

// Find math repeat by checking the neighbor of index array in a bucket.
int CGenome_Index_Table::find_mathmatical_repeats_in_a_bucket(ofstream& ofile, unsigned int BucketStart, unsigned int NextBucketStart)
{
    int num_of_repeats = 0;
    int num_of_repeats_in_the_pattern = 0;
    unsigned int slideWindowLength = this->uiRead_Length + this->uiNoOfShift;

    bool bRepeat = false;
    // No need to check if the bucket is empty or has only one element
    for (unsigned int j = BucketStart; j + 1 < NextBucketStart; j++) {
        //PRINT_GENOME_SUBSTRING(this->pIndexTable[j], 50); // check, print the string
        unsigned int genomeIndex1 = this->pIndexTable[j];
        unsigned int genomeIndex2 = this->pIndexTable[j + 1];
        unsigned int representativeRepeatStartGIndex;

        if (this->sameSubstring(genomeIndex1, genomeIndex2, slideWindowLength)) { // if there is repeat.
            if ( bRepeat == false ) { // new repeat pattern
                this->num_of_repeat_patterns++;
                num_of_repeats++;
                num_of_repeats_in_the_pattern = 1;
                // record the pattern
                char ref[FILENAME_MAX];
                this->pgenomeNTInBits->getSubstringInBits(genomeIndex1, slideWindowLength).decode(ref);
                representativeRepeatStartGIndex = genomeIndex1 + this->uiNoOfShift;
                ofile << representativeRepeatStartGIndex << '\t' << &ref[this->uiNoOfShift] << '\t';
                // set the "shifts" number is duplicated if mapped
                for (unsigned int i = 0; i < this->uiNoOfShift; i++) {
                    unsigned int duplicatedLocus = genomeIndex1 + i;
                    this->pbaRepeatRepresentativeFlag->setflag(duplicatedLocus, true);
                }
            }

            // PRINT_GENOME_SUBSTRING(genomeIndex, 50); // check, print the string
            // masked the duplicate pattern
            unsigned int maskableRepeatStartIndex = genomeIndex2 + this->uiNoOfShift;
            this->pbaRepeatMaskedFlag->setflag(maskableRepeatStartIndex, true); // mask the maskable repeat
            num_of_repeats++;
            num_of_repeats_in_the_pattern ++;
            bRepeat = true;
            ofile << ',' << maskableRepeatStartIndex;
        } else {
            if (bRepeat == true) {
                ofile << '\t' << num_of_repeats_in_the_pattern << endl; // end of an repeat pattern
            }
            bRepeat = false;
            num_of_repeats_in_the_pattern = 0;
        }
    }
    if (bRepeat == true) {
        ofile << '\t' << num_of_repeats_in_the_pattern << endl; // end of an repeat pattern
    }
    return(num_of_repeats);
}

// Set a flag for masked region, which is a read-length-long sliding windows with 'N' in it.
int CGenome_Index_Table::check_masked_flags(void)
{
    unsigned int uiNonMaskedLoci = 0;
    if (this->pbaMaskedFlag == NULL) {
        this->pbaMaskedFlag = new CboolFlagArray(this->pgenomeNT->iGenomeSize + 1);
    } // Default value of each flag is false

    for (unsigned int chrId = 0; chrId < this->pgenomeNT->iNo_of_chromosome; chrId++) {
        char* pChrNT = this->pgenomeNT->paChromosomes[chrId]->caChromosome;
        unsigned int chrLength = this->pgenomeNT->paChromosomes[chrId]->iChromosome_size;

        unsigned int last_masked_base_dis = 0;
        unsigned int genomeLoucsIndex = this->pgenomeNT->chrIndex2genomelocusID(chrId, 0);
        for (unsigned int i = 0; i < chrLength; i++, genomeLoucsIndex++) {
            // Set masked by default
            this->pbaMaskedFlag->setflag(genomeLoucsIndex, true);
            bool thisLocusIsN;
            if (pChrNT == NULL) {
                thisLocusIsN = this->pgenomeNTInBits->pNBits->b(genomeLoucsIndex);
            } else {
                thisLocusIsN = (pChrNT[i] == 'N');
            }

            if (thisLocusIsN) {
                last_masked_base_dis = 0;
            } else {
                last_masked_base_dis ++;
            }
            if (last_masked_base_dis >= this->uiRead_Length) { // If the distance to the previous N is larger than read length
                // genomeLoucsIndex >= this->uiRead_Length becasue last_masked_base_dis was initialize to 0
                this->pbaMaskedFlag->setflag(genomeLoucsIndex + 1 - this->uiRead_Length, false);
                uiNonMaskedLoci ++;
            }
        }
        // The tail of chromosome region are masked.
    }
    LOG_INFO("Info %d: %u out of %u loci are unmasked\n", FINE_LOG, uiNonMaskedLoci, this->pgenomeNT->iGenomeSize);
    return(0);
}


// add repeat masked region from fixed files and set another flags. The region won't be put in the index table.
int CGenome_Index_Table::add_repeat_masked_flags(void)
{
    if (this->pbaRepeatMaskedFlag == NULL) {
        this->pbaRepeatMaskedFlag = new CboolFlagArray(this->pgenomeNT->iGenomeSize + 1);
    } // Default value of each flag is false

    for (unsigned int chrId = 0; chrId < this->pgenomeNT->iNo_of_chromosome; chrId++) {
        unsigned int chrLength = this->pgenomeNT->paChromosomes[chrId]->iChromosome_size;
        unsigned int chrEndGenomeLocus = this->pgenomeNT->chrIndex2genomelocusID(chrId, 0) + chrLength - 1;

        // Open the files that contained the masked regions in (masked start, masked end) in each line
        char repeatMaskRegionsFile[FILENAME_MAX];
        sprintf(repeatMaskRegionsFile, "deadzone_Maskable_%u_%u.txt", this->uiRead_Length, chrId);
        ifstream ifile(repeatMaskRegionsFile);
        if (!ifile.good()) {
            cout << "Can't open file " << repeatMaskRegionsFile << endl;
            continue;
        }

        while (true) {
            char caBuffer[FILENAME_MAX];
            caBuffer[0] = '\0';
            ifile.getline(caBuffer, FILENAME_MAX);
            if (caBuffer[0] == '\0') {
                break; // end of the file
            }
            // Get the masked region number and set the flag. Assume the index also starts from 0.
            unsigned int maskStart = 0;
            unsigned int maskEnd = 0;
            int retrievedItem = sscanf(caBuffer, "%u%u", &maskStart, &maskEnd);
            if(retrievedItem > 0) {
                unsigned int maskSpan = maskEnd - maskStart + 1;
                unsigned int genomeLoucsIndex = this->pgenomeNT->chrIndex2genomelocusID(chrId, maskStart);
                if ( genomeLoucsIndex + maskSpan < chrEndGenomeLocus) {
                    for (unsigned int j = genomeLoucsIndex; j < genomeLoucsIndex + maskSpan; j++) {
                        this->pbaRepeatMaskedFlag->setflag(j, true);
                    }
                }
            }
        }
    }

    unsigned int uiRepeatMaskedLoci = 0;
    for (unsigned int i = 0; i < this->pgenomeNT->iGenomeSize; i++) {
        if (this->pbaRepeatMaskedFlag->b(i)) {
            uiRepeatMaskedLoci++;
        }
    }
    cout << uiRepeatMaskedLoci << " out of " << this->pgenomeNT->iGenomeSize;
    cout << " loci are skip as maskable repeats." << endl;
    return(0);
}

bool testTable(CGenome_Index_Table& table)
{
    // FOR TEST Purpose
    // (1) Each Index in the table contains no N wihtin the sliding windows of length (this->readlength - this->iNoOfShift)
    for (unsigned int i = 0; i < table.pHashIndexTable->uiSize; i++) {
        unsigned int BucketStart, NextBucketStart;
        BucketStart = table.pHashIndexTable->aiIndexTable[i];
        NextBucketStart = table.pHashIndexTable->aiIndexTable[i + 1];
        for (CIndex_Type* it = &(table.pIndexTable[BucketStart]); \
                it < &(table.pIndexTable[NextBucketStart]); it++) {
            if (table.pgenomeNTInBits->pNBits->b(*it, table.uiRead_Length - table.uiNoOfShift)) {
                cout << "Wrongly put masked region to the index " << endl;
                ERR
            }
            /* Check if the reads is sorted
            CReadInBits kmerInBits = this->pgenomeNTInBits->getSubstringInBits(*it, this->uiRead_Length);
            kmerInBits = bases2PureColors(kmerInBits);
            printBitsStr(kmerInBits, this->uiRead_Length - 1);
            unsigned int uiHashValue = this->getHashValue(kmerInBits);
            unsigned int uiSeedKey = this->getSeedKey(kmerInBits);
            cout << "Got you " << *it << ',' << uiHashValue << ',' << this->getSeedKey(kmerInBits) << endl;
            */
        }
    }

    return(0);
}
